/*
Paper title: "Impact of Juntos conditional cash transfer program on nutritional and cognitive outcomes in Peru: ///
Comparison between younger and older initial exposure"
Authors: Alan Sánchez, Guido Melendez, Jere Behrman
Code written by: Alan Sánchez & Guido Melendez
Note: this analysis uses datafile "index_sibR1R4", which contains merged data from different rounds of the Young Lives Study in Peru.
*/

clear all
set matsize 5000
set memory 4000m
set maxvar 20000
set more off

* insert directory here
loc ruta   "F:\JUNTOS\_1 files"
loc rutaf  "F:\JUNTOS\_2 results"
loc rutagr "F:\JUNTOS\_3 graphics"

***************************************************************************
*                                                                         *
*                                                                         *
*              		TABLES B.7 & B.8 OF APPENDIX B                        *
*                                                                         *
*                                                                         *
***************************************************************************

/* This do-file replicates tables B.7 & B.8 of Appendix B. */

cd "`ruta'"
use index_sibR1R4, clear
sort CHILDID

***************************************************************************
*                                                                         *
*                                                                         *
*               I. DEFINIFION OF ANALYTICAL SAMPLE                        *
*                                                                         *
*                                                                         *
***************************************************************************

*** Only families with paired-siblings are considered for the analisys
duplicates tag CHILDID,generate(duplicate)
tab duplicate
drop if duplicate==0

/*** There are 62 families where the index child has 2 siblings instead of 1. We drop one of them, as follows:
In 55 families the second siblings is older than the index children. In this case we retain the younger sibling.
In the other 7 cases we chose one sibling at random.
*/
replace dup=1 if CHILDID=="PE161080" | CHILDID=="PE201100"
drop if CHILDID=="PE201100" & ID==16
drop if CHILDID=="PE171035" & ID==5
drop if CHILDID=="PE031085" & ID==17
drop if CHILDID=="PE091051" & ID==18
drop if CHILDID=="PE161080" & ID==4
drop if CHILDID=="PE201100"	& ID==16
drop if CHILDID=="PE091078"	& ID==6
drop if ID!=0 & duplicate==2 & age_months_78==.
drop duplicate

***************************************************************************
*                                                                         *
*                                                                         *
*              II. CREATION OF KEY VARIABLES (part 1)                     *
*                                                                         *
*                                                                         *
***************************************************************************

* Create household ID as numeric variable
sort CHILDID
egen hh=group(CHILDID)

* Create age in years 
gen age_45=int(age_months_45/12)
replace age_45=4 if age_45==3
replace age_45=6 if age_45==7

gen age_78=int(age_months_78/12)
replace age_78=7 if age_78==5
replace age_78=10 if age_78==11

* Standardizing PPVT scores by age in years (baseline)
egen std1=std(PPVT_45) if age_months_45<42 & PPVT_45!=.
egen std2=std(PPVT_45) if age_months_45>=43 & age_months_45<=48 & PPVT_45!=.
egen std3=std(PPVT_45) if age_months_45>=49 & age_months_45<=54 & PPVT_45!=.
egen std4=std(PPVT_45) if age_months_45>=55 & age_months_45<=60 & PPVT_45!=.
egen std5=std(PPVT_45) if age_months_45>=61 & age_months_45<=66 & PPVT_45!=.
egen std6=std(PPVT_45) if age_months_45>=67 & age_months_45<=72 & PPVT_45!=.
egen std7=std(PPVT_45) if age_months_45>=73 & age_months_45<=79 & PPVT_45!=.
egen std8=std(PPVT_45) if age_months_45>=80 & PPVT_45!=.

gen std_PPVT_45=.
replace std_PPVT_45=std1 if age_months_45>=37 & age_months_45<=42
replace std_PPVT_45=std2 if age_months_45>=43 & age_months_45<=48
replace std_PPVT_45=std3 if age_months_45>=49 & age_months_45<=54
replace std_PPVT_45=std4 if age_months_45>=55 & age_months_45<=60
replace std_PPVT_45=std5 if age_months_45>=61 & age_months_45<=66
replace std_PPVT_45=std6 if age_months_45>=67 & age_months_45<=72
replace std_PPVT_45=std7 if age_months_45>=73 & age_months_45<=79
replace std_PPVT_45=std8 if age_months_45>=80 & PPVT_45!=. 

label var std_PPVT_45 "Standardized PPVT raw original score, 4-5 age"
drop std1 std2 std3 std4 std5 std6 std7 std8 

* Standardizing PPVT scores by age in years (follow-up)
egen std7=std(PPVT_78)  if age_months_78<78 & PPVT_78!=.
egen std8=std(PPVT_78)  if age_months_78>=79  & age_months_78<=84 & PPVT_78!=.
egen std9=std(PPVT_78)  if age_months_78>=85  & age_months_78<=90 & PPVT_78!=.
egen std10=std(PPVT_78) if age_months_78>=91  & age_months_78<=96 & PPVT_78!=.
egen std11=std(PPVT_78) if age_months_78>=97  & age_months_78<=102 & PPVT_78!=.
egen std12=std(PPVT_78) if age_months_78>=103 & age_months_78<=108 & PPVT_78!=.
egen std13=std(PPVT_78) if age_months_78>=109 & age_months_78<=114 & PPVT_78!=.
egen std14=std(PPVT_78) if age_months_78>=115 & age_months_78<=120 & PPVT_78!=. 
egen std15=std(PPVT_78) if age_months_78>=121 & age_months_78<=126 & PPVT_78!=. 
egen std16=std(PPVT_78) if age_months_78>=127 & PPVT_78!=.

gen std_PPVT_78=.
replace std_PPVT_78=std7  if age_months_78>=73  & age_months_78<=78
replace std_PPVT_78=std8  if age_months_78>=79  & age_months_78<=84
replace std_PPVT_78=std9  if age_months_78>=85  & age_months_78<=90
replace std_PPVT_78=std10 if age_months_78>=91  & age_months_78<=96
replace std_PPVT_78=std11 if age_months_78>=97  & age_months_78<=102
replace std_PPVT_78=std12 if age_months_78>=103 & age_months_78<=108
replace std_PPVT_78=std13 if age_months_78>=109 & age_months_78<=114
replace std_PPVT_78=std14 if age_months_78>=115 & age_months_78<=120 
replace std_PPVT_78=std15 if age_months_78>=121 & age_months_78<=126 
replace std_PPVT_78=std16 if age_months_78>=127  
label var std_PPVT_78 "Standardized PPVT raw original score, 7-8 age"
drop std7 std8 std9 std10 std11 std12 std13 std14 

* Severe stunting
gen estunted_45=0
replace estunted_45=1 if zhfa_45<-3
replace estunted_45=. if zhfa_45==.
label var estunted_45 "Child is severely stunted, 4-5 age"

gen estunted_78=0
replace estunted_78=1 if zhfa_78<-3 
replace estunted_78=. if zhfa_78==.
label var estunted_78 "Child is extremely stunted, 7-8 age"

* Variables associated to Juntos elegibility rule (round 2)
clonevar tipom2R2=tipom2_45 if sib==0
clonevar tipom3R2=tipom3_45 if sib==0
clonevar tipom4R2=tipom4_45 if sib==0
clonevar serv3R2=serv3_45 if sib==0
clonevar n_equipR2=n_equip_45 if sib==0
clonevar combustR2=combust_45 if sib==0
clonevar women_weducR2=women_weduc_45 if sib==0
clonevar edu_men614R2=edu_men614_45 if sib==0
clonevar edu_men617R2=edu_men617_45 if sib==0
clonevar age_caregiverR2=age_caregiver_45 if sib==0
clonevar HHSIZER2=HHSIZE_45 if sib==0

foreach x in altitudeR2 tipom2R2 tipom3R2 tipom4R2 serv3R2 n_equipR2 combustR2 women_weducR2 edu_men614R2 edu_men617R2 age_caregiverR2 HHSIZER2 hh_agricR2 hh_noactR2 {
replace `x'=0 if `x'==.
bys CHILDID: egen junk`x'=sum(`x')
replace `x'=junk`x'
drop junk`x'
}

* Diferences in HAZ, sibling

foreach y in std_PPVT zhfa estunted stunted {
gen dif`y'=`y'_78-`y'_45 
} 

label var difstd_PPVT "Change in standardized ppvt score"
label var difzhfa "Change in haz"
label var difstunted "Change in stunting"
label var difstunted "Change in stunting"

order CHILDID ID sib ybirth mbirth monthdob METH MUMED CAREED CETH mum_analf care_analf CLUSTIDR1 zhfaR1 ///
altitudeR1 tipom2R1 tipom3R1 tipom4R1 serv3R1 n_equipR1 ///
combustR1 women_weducR1 edu_men614R1 edu_men617R1 HHSIZER1 age_caregiverR1 hh_agricR1 hh_noactR1 ///
altitudeR2 tipom2R2 tipom3R2 tipom4R2 serv3R2 n_equipR2 ///
combustR2 women_weducR2 edu_men614R2 edu_men617R2 HHSIZER2 age_caregiverR2 hh_agricR2 hh_noactR2 female CLANG


***************************************************************************
*                                                                         *
*                                                                         *                                                                         *
*            III. RESHAPE OF DATABASE FOR PANEL DATA ANALYSIS             *
*                                                                         *
*                                                                         *
***************************************************************************

reshape long date_int_ year_ rural_ electricity_ water_ drain_ serv3_ floor_ roof_ wall_ combust_ n_equip_ hq_ ///
cd_ sv_ wi_ tipom1_ tipom2_ tipom3_ tipom4_ totalexp_rpc_ HHSIZE_ edu_men614_ edu_men617_ women_weduc_ ///
age_mother_ age_caregiver_ age_months_ zhfa_ stunted_ PPVT_ age_ std_PPVT_ estunted_ ATTNDSCHL_ GRADE_ ppvtlang_ ///
, i(CHILDID sib) j(round)

foreach x in date_int year rural electricity water drain serv3 floor roof wall combust n_equip hq ///
cd sv wi tipom1 tipom2 tipom3 tipom4 totalexp_rpc HHSIZE edu_men614 edu_men617 women_weduc ///
age_mother age_caregiver age_months zhfa stunted PPVT age std_PPVT estunted ATTNDSCHL GRADE ppvtlang ///
{
rename `x'_ `x'
}
 
label var date_int "Date of interview (in months)"
label var year "Year of interview"
label var rural "Area of residence is rural"
label var electricity "Dwelling has electricity"
label var water "Main source of drinking water is Piped into dwelling/yard/plot"
label var drain "Type of toilet facility is Flush toilet/septic tank in dwelling"
label var serv3 "Number of basic public services" 
label var floor "Main floor material is high quality material" 
label var roof "Main roof material is high quality material"
label var wall "Main wall material is high quality material"
label var combust "Main fuel for cooking is gas, electricity or kerosene"
label var n_equip "Number of assets household doesn't have"
label var hq "Housing queality index"
label var cd "Consumer durables index"
label var sv "Access to services index"
label var wi "Wealth index"
label var tipom1 "Type of housing quality: very low"
label var tipom2 "Type of housing quality: low"
label var tipom3 "Type of housing quality: medium"
label var tipom4 "Type of housing quality: high"
label var totalexp_rpc "Total real consumption per capita - base 2006"
label var HHSIZE "Household size"
label var edu_men614 "Percentage of children who are in full-time education (6-17)"
label var edu_men617 "Percentage of children who are in full-time education (6-14)"
label var women_weduc "Percentage of adult women without formal education"
label var age_mother "Age of mother (in years)"
label var age_caregiver "Age of mother (in years)" 
label var age_months "Age of child in months"
label var zhfa "Heigh-for-age z score"
label var stunted "Child is stunted"
label var PPVT "PPVT original raw score"
label var age "Age of child (in years)"
label var std_PPVT "Standardized PPVT original raw score"
label var estunted "Child is extremely stunted"
label var ATTNDSCHL "Are you currenty in full-time education?"
label var GRADE "Highest grade attainment or current school grade"
label var ppvtlang "Language used by child during PPVT test"

replace round=0 if round==45
replace round=1 if round==78
label define round 0 "Before" 1 "After"
label values round round
format %10.0g round
label var round "After treatment"

egen RCHILDID =concat(CHILDID sib)
egen nRCHILDID=group(RCHILDID)
egen cid=group(CHILDID)

* Defining as a panel

sort CHILDID sib
egen pid=group(CHILDID sib)

xtset pid round

***************************************************************************
*                                                                         *
*                                                                         *
*              IV. CREATION OF KEY VARIABLES (part 2)                     *
*                                                                         *
*                                                                         *
***************************************************************************

* Months of exposure
gen months_exposed=date_int-datenrolljuntos
replace months_exposed=0 if months_exposed<0   
replace months_exposed=0 if rec_juntos==0
gen junk=months_exposed/12
gen years_exposed=round(junk)
replace years_exposed=0 if rec_juntos==0
tab years_exposed round if sib==0
tab years_exposed round if sib==1

drop junk 
gen junk=years_exposed
replace junk=0 if round==1
bys CHILDID sib: egen junk2=max(junk)
replace junk2=0 if sib==0
replace junk2=0 if round==0
tab junk2 round if sib==0
tab junk2 round if sib==1
rename junk2 years_exposed2
label var years_exposed2 "Years exposed to JUNTOS in round 0"

* Age of months during the first month ox exposition to JUNTOS
gen agemonths_firstT=datenrolljuntos- monthdob
replace agemonths_firstT=0 if agemonths_firstT<0 & agemonths_firstT!=.  
label var agemonths_firstT "Age in months during the first month of exposition to JUNTOS"

* Families enrolled to JUNTOS in 2005 and 2006 (not to be considered because the lack of baseline for index children)
gen baselinejuntos=1
replace baselinejuntos=0 if yenrolljuntos==2005 | yenrolljuntos==2006

* Age in semesters
gen semester4=(age_months<=24) & round==0
gen semester5=(age_months>24 & age_months<=30) & round==0
gen semester6=(age_months>30 & age_months<=36) & round==0
gen semester7=(age_months>36 & age_months<=42) & round==0
gen semester8=(age_months>42 & age_months<=48) & round==0
gen semester9=(age_months>48 & age_months<=54) & round==0
gen semester10=(age_months>54 & age_months<=60) & round==0
gen semester11=(age_months>60 & age_months<=66) & round==0
gen semester12=(age_months>66 & age_months<=72) & round==0
gen semester13=(age_months>72 & age_months<=78) & round==0
gen semester14=(age_months>78 & age_months<=92) & round==0

* Groups of Juntos' families
gen juntosgroup=. 
replace juntosgroup=1 if juntosuntilR3==1 | juntosuntilR3==2
replace juntosgroup=2 if juntosuntilR3==0 & (juntosuntilR4==1 | juntosuntilR4==2)
replace juntosgroup=3 if juntosuntilR3==0 & juntosuntilR4==0
label var juntosgroup "Type of Juntos's treatment R2-R3" 
label define juntosgroup 1 "Treated R2-R3" 2 "Treated R3-R4" 3 "Never treated" 
label values juntosgroup juntosgroup 

* Siblings first exposed to JUNTOS at age 5 or above are not considered in the sample
gen years03=1
replace years03=0 if sib==1 & age_enrollment==5

tab age_enrollment if sib==1 & juntosgroup==1 & years03==1 & round==0 & baselinejuntos==1

* Create a dummy equal 1 if child recieves Juntos in R2-R3 and is observed after treatment
gen aftjuntosR3=rec_juntos*round
label var aftjuntosR3 "Child receives Juntos in R3, observed after treatment"

* Family recieves Juntos in R3-R4
gen juntosr4=.
replace juntosr4=0 if juntosuntilR4==0 & juntosuntilR3==0  
replace juntosr4=1 if juntosuntilR3==0 & (juntosuntilR4==1 | juntosuntilR4==2)
label var juntosr4 "Family recieves Juntos in R4"

* Create a dummy equal 1 if child recieves Juntos in R3-R4 and is observed after treatment
gen aftjuntosR4=juntosr4*round
label var aftjuntosR4 "Child receives Juntos in R4, observed after treatment"

* Create a dummy equal 1 if child recieves Juntos & is observed after treatment & is a younger sibling
gen aftjuntosR3sib=rec_juntos*round*sib
label var aftjuntosR3sib "Child receives Juntos, observed after treatment, younger siblings"

label var zhfaR1 "Index' height-for-age z score, round 1"

label var female "Child is female"
 
***************************************************************************
*                                                                         *
*                                                                         *
*                     V. REGRESSION ANALYSIS                              *
*                                                                         *
*                                                                         *
***************************************************************************                                                                        *

* Defining globals

global household "MUMED CETH wiR1"
global child "female i.age"
global child2 "female i.age i.ppvtlang"
global district "prom_nbi"
global cluster "CHILDID"
global clusterid "ubigeoR1"
global condition "quintil!=."
global sample_j1 "sample==1"
global sample_j2 "cogsample==1"
global datejuntos "baselinejuntos==1"
global criticalperiod "years03==1"
global cogcontrol "i.ppvtlang"
global treatmentr3      "rec_juntos round                 aftjuntosR3"
global fetreatmentr3    "           round                 aftjuntosR3"
global treatmentr3s     "rec_juntos round                 aftjuntosR3 years_exposed2"
global fetreatmentr3s   "           round                 aftjuntosR3 years_exposed2"
global treatmentr3s_all "rec_juntos round                 aftjuntosR3 years_exposed2"
global treatmentr4      "juntosr4   round aftjuntosR4"
global fetreatmentr4    "           round aftjuntosR4"

* Defining sample for nutrition
keep if $criticalperiod 
xi: areg zhfa $household $child $district $treatmentr3 if $condition & baselinejuntos, abs($cluster)
gen junksample=e(sample)

duplicates tag CHILDID if junksample==1, gen(rep)
tab rep
gen sample=0
replace sample=1 if rep==3

tab rec_juntos if rep==3 & round==0 & sib==0 & (juntosgroup==1 | juntosgroup==3)

drop junksample rep

* Defining sample for cognitive outcomes

// Those children who took the PPVT in Spanish
tempvar junkppvtlang0
gen `junkppvtlang0'=ppvtlang if round==0
tempvar ppvtlang0
bys pid: egen `ppvtlang0'=min(`junkppvtlang0')

tempvar junkppvtlang1
gen `junkppvtlang1'=ppvtlang if round==1
tempvar ppvtlang1
bys pid: egen `ppvtlang1'=min(`junkppvtlang1')

gen bothspanishppvt=(`ppvtlang0'==4 & `ppvtlang1'==4)
replace bothspanishppvt=. if PPVT==.
label var bothspanishppvt "Language used by child during PPVT test is Spanish in both rounds"

// Final sample
reg std_PPVT zhfa $household $child $district $treatmentr3 if $condition & baselinejuntos, abs($cluster)
*reg std_PPVT $household $child $district if $condition & baselinejuntos, abs($cluster)
gen junksample=e(sample)

duplicates tag CHILDID sib if junksample==1, gen(rep) 
tab rep
gen cogsample=0
replace cogsample=1 if rep==1
replace cogsample=0 if cogsample==1 & bothspanishppvt==0
drop junksample rep

gen juntosgroup1=0
replace juntosgroup1=1 if juntosgroup==1

// Enrollment of the siblings
tab age_enrollment if sib==1
gen enroll_0_2=0
gen enroll_3_4=0
replace enroll_0_2=1 if sib==1 & age_enrollment==0
replace enroll_0_2=1 if sib==1 & age_enrollment==1
replace enroll_0_2=1 if sib==1 & age_enrollment==2
replace enroll_3_4=1 if sib==1 & age_enrollment==3
replace enroll_3_4=1 if sib==1 & age_enrollment==4 

tab enroll_0_2
tab enroll_3_4


***********************************************************************************
*                                                                                 *
*                                                                                 *
*                        TABLES B.7 OF APPENDIX B 		                          *
*                                                                                 *
*                                                                                 *
***********************************************************************************


************************************************************************************
* Stunting Sibling, comparing treated in R2-R3 and never treated 
************************************************************************************

* III. Child fix effects
xi: xtreg stunted  $fetreatmentr3s $household $child      if sib==1 & $sample_j1 & (juntosgroup==1 | juntosgroup==3), fe cluster($cluster)
estimates store stunting1

/* it is important to note that the coefficients associated with the duration of exposure and program impact tend 
to have opposite signs, which suggests a high degree of collinearity between the two variables */
corr aftjuntosR3 years_exposed2 if sib==1 & $sample_j1 & (juntosgroup==1 | juntosgroup==3) 

corr rec_juntos years_exposed2 if sib==1 & $sample_j1 & (juntosgroup==1 | juntosgroup==3) 

************************************************************************************
* Severe stunting Sibling, comparing treated in R2-R3 and never treated 
************************************************************************************

* III. Child fix effects
xi: xtreg estunted  $fetreatmentr3s $household $child        if sib==1 & $sample_j1 & (juntosgroup==1 | juntosgroup==3), fe cluster($cluster)
estimates store sstunting1

************************************************************************************
* HAZ Sibling, comparing treated in R2-R3 and never treated 
************************************************************************************

* III. Child fix effects
xi: xtreg zhfa  $fetreatmentr3s $household $child      if sib==1 & $sample_j1 & (juntosgroup==1 | juntosgroup==3), fe cluster($cluster)
estimates store haz1

************************************************************************************
* PPVT Sibling, comparing treated in R2-R3 and never treated
************************************************************************************

* III. Child fix effects
xi: xtreg std_PPVT  $fetreatmentr3s $household $child2      if $sample_j2 & sib==1 & (juntosgroup==1 | juntosgroup==3), fe cluster($cluster)
estimates store ppvt1

* export results in table excel  
xml_tab stunting1 sstunting1 haz1 ppvt1, replace sheet(tableB7) ///
save("`rutaf'\tableB7_to_B8_appendixB.xls") title("Effects of first expansion of Juntos Program, Younger Siblings") below stats(N r2 r2_a) ///
keep (round aftjuntosR3 years_exposed2)  

***********************************************************************************
*                                                                                 *
*                                                                                 *
*                          TABLES B.8 OF APPENDIX B 	                              *
*                                                                                 *
*                                                                                 *
***********************************************************************************

* creating the dummy of timming
gen time=1
replace time=2 if round==1 & sib==0
replace time=2 if round==0 & sib==1
replace time=3 if round==1 & sib==1
label define timex 1 "Round 2" 2 "Round 3" 3 "Round 4"
label values time timex 
label var time "Round"

tab time, gen(time_)
rename time_1 round2
rename time_2 round3
rename time_3 round4

* estimators of interest  
gen round3xjuntos=rec_juntos*round3
label var round3xjuntos "beneficiary*round 3"

gen round4xjuntos=rec_juntos*round4
label var round4xjuntos "beneficiary*round 4"

gen round4xjuntos_0_2=rec_juntos*round4*enroll_0_2
gen round4xjuntos_3_4=rec_juntos*round4*enroll_3_4

global juntos "round3 round4 rec_juntos round3xjuntos round4xjuntos years_exposed2"
global test "round3xjuntos=round4xjuntos"

matrix M2=J(1,4,.)
matrix colnames M2= "hatstunted" "hatestunted" "hathaz" "hatppvt" 
matrix rownames M2= "p-value (c4=c5)"

* 1. Stunting
xi: xtreg stunted $juntos $household  $child $district  if $sample_j1 & (juntosgroup==1 | juntosgroup==3), cluster($cluster) fe
estimates store test1stunt
test $test
local pvalue1=r(p)
* 2. Severe stunting
xi: xtreg estunted $juntos $household $child $district  if $sample_j1 & (juntosgroup==1 | juntosgroup==3), cluster($cluster) fe
estimates store test1extstunt
test $test
local pvalue2=r(p)
* 3. HAZ
xi: xtreg zhfa     $juntos $household $child $district  if $sample_j1 & (juntosgroup==1 | juntosgroup==3), cluster($cluster) fe
estimates store test1haz
test $test
local pvalue3=r(p)
* 4. PPVT
xi: xtreg std_PPVT $juntos $household $child2 $district if $sample_j2 & (juntosgroup==1 | juntosgroup==3), cluster($cluster) fe
estimates store test1ppvt
test $test
local pvalue4=r(p)

xml_tab test1stunt test1extstunt test1haz test1ppvt, append sheet(tableB8) ///
save("`rutaf'\tableB7_to_B8_appendixB.xls") title("Pooled estimates") below stats(N r2 r2_a) ///
keep ($juntos)

matrix M2[1,1]=`pvalue1'
matrix M2[1,2]=`pvalue2'
matrix M2[1,3]=`pvalue3'
matrix M2[1,4]=`pvalue4'

xml_tab M2, append sheet(test_tableB8) save ("`rutaf'\tableB7_to_B8_appendixB.xls") title("P value") 
